
import time
from webdriver_manager.chrome import ChromeDriverManager
import os
import sys
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(BASE_DIR)
import requests
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By

import tqdm

option = webdriver.ChromeOptions()
# option.add_argument("headless")
s = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=s, options=option)
urls=[]
err_url_file=open("err_url",'w')
correct_url=open("all_state",'w')
count=0
urls_middle=[]
for page in tqdm.trange(1,2):
    urls_middle=[]
    time.sleep(3)
    driver.get("https://www.sciencenews.org/all-stories/page/"+str(page))
    elements=driver.find_elements(by=By.XPATH, value="/html/body/div[1]/div[1]/main/section/div/ol/li")
    for i in range(1,len(elements)+1):
        try:
            element=driver.find_element(by=By.XPATH, value="/html/body/div[1]/div[1]/main/section/div/ol/li["+str(i)+"]/div/h3/a")
            url=element.get_attribute("href")
            urls.append(url)
            urls_middle.append(url)
            
        except Exception as e:
            err_url_file.write(str(page)+" "+str(i))
            err_url_file.write("\n")
    for url in urls_middle:
        paper=driver.get(url=url)
        time.sleep(3)
        flag=False
        p_s=driver.find_elements(by=By.XPATH, value="/html/body/div[1]/div[1]/main/article/div[3]/div/div/p")
        for p in p_s:
            if "China" in p.text or "china" in p.text:
                count+=1
                flag=True

        correct_url.write(str(page))
        correct_url.write(" ")
        correct_url.write(str(i))
        correct_url.write(" ")
        correct_url.write(str(flag))
        correct_url.write("\n")
print("all:"+str(len(urls)))
print("china: "+str(count))
correct_url.close()
err_url_file.close()
# print(len(urls))
# for url in urls:
#     paper=requests.get(url=url)
#     time.sleep(1)
#     if "China" in page.text or "china" in paper.text:
#        count+=1
        